To complement the fixed window analysis focused on emergence and elimination events, we also want to show that the dynamics of EWS reflect those of \(R_E\). I estimated \(R_E\) over the observations via particle filtering and state estimation. Here, I read in those values for each city and correlate them with top-performing EWS. Top-performing EWS are those with high AUC in the emergence and elimination scenarios. Note that \(R_E\) was calculated as:
\[ R_E = \frac{\beta_t}{\gamma} \frac{S_t}{N_t}. \]
library(tidyverse)
Warning message:
In scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
EOF within quoted string
cities <- c("Agadez", "Maradi", "Niamey", "Zinder") # the four city names
reffs <- tibble() # empty tibble for storage
for(do_city in cities){
fname <- paste0("../../results/filtered-states-", do_city, ".RDS")
raw_filter <- readRDS(fname)
reff_id <- which(raw_filter$state == "effective_r_seasonal")
tmp_reff <- raw_filter$data[[reff_id]] %>%
dplyr::select(time, med, week, date) %>%
dplyr::mutate(city = do_city)
reffs <- bind_rows(reffs, tmp_reff)
}
ggplot(reffs, aes(x = date, y = med, color = city)) +
geom_hline(aes(yintercept = 1), linetype = 2, color = "grey35") +
geom_line() +
labs(x = "date", y = expression(italic(R)[E]),
title = "Effective reproduction number over time") +
ggthemes::scale_color_colorblind()
Now I calculate the EWS at each observation time using the spaero::get_stats() function. I use a bandwidth of 30 weeks and set backward_only = TRUE so that only past data are used when calculating EWS.
library(spaero)
fname <- "../../data/clean-data/weekly-measles-incidence-niger-cities-clean.RDS"
measles_data <- readRDS(fname) %>%
filter(year > 1994) # drop first NA year, only used for modeling
all_stats <- tibble() # empty tibble for storage
for(do_region in unique(measles_data$region)){
cases <- measles_data %>%
filter(region == do_region) %>%
pull(cases)
city_stats <- spaero::get_stats(
x = cases,
center_trend = "local_constant",
center_kernel = "uniform",
center_bandwidth = 30,
stat_trend = "local_constant",
stat_kernel = "uniform",
stat_bandwidth = 30,
lag = 1,
backward_only = TRUE
)$stats
city_stats_tb <- as_tibble(city_stats) %>%
mutate(
time_iter = 1:n(),
date = unique(measles_data$date)
) %>%
gather(key = ews, value = value, -time_iter, -date) %>%
mutate(region = do_region)
all_stats <- bind_rows(all_stats, city_stats_tb)
}
# best_ews <- c("autocorrelation", "autocovariance", "mean", "variance")
# all_stats <- all_stats %>%
# dplyr::filter(ews %in% best_ews)
Now I look at scatterplots of \(R_E\) versus EWS.
reffs <- reffs %>%
dplyr::mutate(region = paste0(city, " (City)")) %>%
dplyr::select(-city)
ews_reffs <- left_join(all_stats, reffs)
Joining, by = c("date", "region")
ews_reffs_subcrit <- ews_reffs %>%
filter(med < 1)
ggplot(ews_reffs_subcrit, aes(x = value, y = med, color = region)) +
geom_point(alpha = 0.2) +
facet_wrap(~ews, scales = "free") +
labs(x = "EWS value", y = expression(italic(R)[E])) +
ggthemes::scale_color_colorblind()
# for(do_region in unique(ews_reffs$region)){
# print(ggplot(filter(ews_reffs, region == do_region & ews == "variance"),
# aes(x = med, y = value, color = lubridate::week(date))) +
# geom_point() +
# facet_wrap(~lubridate::year(date), scales = "free") +
# labs(x = expression(R[E]), y = "EWS value", title = do_region) +
# scale_color_viridis_c(name = "week of year", direction = 1))
# }
No clear signal in those plots. Next I’ll look at the first difference of each EWS (x) between time t and \(t+1\). I calculate the difference in three ways:
ews_reffs_diffed <- ews_reffs %>%
group_by(ews, region) %>%
mutate(
diff_value = value - lag(value, 1),
ratio_value = value / lag(value, 1),
log_ratio_value = log(ratio_value)
) %>%
rename("Reff" = med)
NaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs produced
ews_reffs_diffed %>%
filter(Reff < 1) %>%
ggplot(aes(y = Reff, x = diff_value)) +
geom_point(aes(color = lubridate::week(date)), alpha = 0.7) +
facet_wrap(ews~region, scales = "free", ncol = 4) +
scale_color_viridis_c(name = "Week of year", limits = c(1,52),
breaks = c(1,26,52)) +
labs(x = "First difference of EWS", y = expression(R[E])) +
theme(legend.position = "top")
ews_reffs_diffed %>%
filter(Reff < 1) %>%
ggplot(aes(y = Reff, x = ratio_value)) +
geom_point(aes(color = lubridate::week(date)), alpha = 0.7) +
facet_wrap(ews~region, scales = "free", ncol = 4) +
scale_color_viridis_c(name = "Week of year", limits = c(1,52),
breaks = c(1,26,52)) +
labs(x = "First ratio of EWS", y = expression(R[E])) +
theme(legend.position = "top")
ews_reffs_diffed %>%
filter(Reff < 1) %>%
ggplot(aes(y = Reff, x = log_ratio_value)) +
geom_point(aes(color = lubridate::week(date)), alpha = 0.7) +
facet_wrap(ews~region, scales = "free", ncol = 4) +
scale_color_viridis_c(name = "Week of year", limits = c(1,52),
breaks = c(1,26,52)) +
labs(x = "log(First ratio of EWS)", y = expression(R[E])) +
theme(legend.position = "top")
It looks like there might be trends when the EWS are actually “moving”, that is, when \(\text{log}\left(\frac{x(t)}{x(t-1)} \right) \ne 0\) or approximately so. So, I’ll remove those values that are close to zero by assuming all values less than \(|0.05|\) are essentially zero. Now some patterns emerge as expected.
ews_reffs_diffed %>%
filter(Reff < 1) %>%
filter(round(log_ratio_value,1) != 0) %>%
ggplot(aes(x = log_ratio_value, y = Reff)) +
geom_point(aes(color = lubridate::week(date)), alpha = 0.7) +
stat_smooth(method = "lm", se = FALSE, linetype = 1, color = "black") +
facet_wrap(ews~region, scales = "free", ncol = 4) +
scale_color_viridis_c(name = "Week of year", limits = c(1,52),
breaks = c(1,26,52)) +
labs(x = "log(First ratio of EWS)", y = expression(R[E])) +
theme(legend.position = "top")